Purpose

This document will explore the spatial analyses of the data

Required packages and data

library(PramCurry)
## Loading required package: poppr
## Loading required package: adegenet
## Loading required package: ade4
##    ==========================
##     adegenet 1.4-2 is loaded
##    ==========================
## 
##  - to start, type '?adegenet'
##  - to browse adegenet website, type 'adegenetWeb()'
##  - to post questions/comments: adegenet-forum@lists.r-forge.r-project.org
## 
## 
## This is poppr version 1.1.2.99.70. To get started, type package?poppr
library(poppr)
library(reshape2)
library(ggplot2)
data(ramdat)
data(pop_data)
options(stringsAsFactors = FALSE)
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_1.0.0     reshape2_1.4      PramCurry_1.0     poppr_1.1.2.99-70
## [5] adegenet_1.4-2    ade4_1.6-2        rgdal_0.8-16      sp_1.0-15        
## 
## loaded via a namespace (and not attached):
##  [1] ape_3.1-4           assertthat_0.1      bitops_1.0-6       
##  [4] boot_1.3-11         caTools_1.17.1      colorspace_1.2-4   
##  [7] digest_0.6.4        dplyr_0.2           evaluate_0.5.5     
## [10] fastmatch_1.0-4     formatR_1.0         ggmap_2.3          
## [13] grid_3.1.2          gtable_0.1.2        htmltools_0.2.6    
## [16] httpuv_1.3.2        igraph_0.7.1        knitr_1.6          
## [19] lattice_0.20-29     mapproj_1.2-2       maps_2.3-9         
## [22] MASS_7.3-35         Matrix_1.1-4        munsell_0.4.2      
## [25] nlme_3.1-117        packrat_0.4.1-1     parallel_3.1.2     
## [28] pegas_0.6           permute_0.8-3       phangorn_1.99-7    
## [31] plyr_1.8.1          png_0.1-7           proto_0.3-10       
## [34] Rcpp_0.11.3         RgoogleMaps_1.2.0.6 rjson_0.2.14       
## [37] RJSONIO_1.3-0       rmarkdown_0.3.10    scales_0.2.4       
## [40] shiny_0.10.1        stringr_0.6.2       tools_3.1.2        
## [43] vegan_2.0-10        xtable_1.7-4        yaml_2.1.13

Mantel Tests

The purpose of a mantel test is to test the hypothesis that genetic distance is correlated with geographic distance. As we have defined this data by year and by watershed, we will perform the mantel test on 4 scales:

  1. Overall
  2. By Year
  3. By Watershed
  4. By Year with respect to Watershed
options(digits = 10)
newReps <- other(ramdat)$REPLEN
(newReps[3] <- 4) # Tetranucleotide repeat
## [1] 4
(newReps <- fix_replen(ramdat, newReps))
##  PrMS6A1  Pr9C3A1 PrMS39A1 PrMS45A1 PrMS43A1 
##  3.00000  2.00000  4.00001  4.00000  4.00000
bdist  <- bruvo.dist(ramdat, replen = newReps)#other(ramdat)$REPLEN)
gdist  <- dist(pop_data[, c("LAT", "LON")])

1. Overall

system.time(overall_mantel <- spatial_stats(ramdat, 
                                xy = gdist, 
                                hierarchy = NULL, 
                                distance = bdist,
                                sample = 99999,
                                seed = 9001))

plot of chunk overall

##    user  system elapsed 
##  42.631   0.312  43.053
overall_mantel
## Monte-Carlo test
## Call: mantel.randtest(m1 = dist_xy, m2 = dist_gen, nrepet = sample)
## 
## Observation: 0.5234581822 
## 
## Based on 99999 replicates
## Simulated p-value: 1e-05 
## Alternative hypothesis: greater 
## 
##          Std.Obs      Expectation         Variance 
##  1.991809716e+01 -8.695085620e-06  6.906892877e-04
overall_lm <- spatial_stats(ramdat, 
                            xy = gdist, 
                            hierarchy = NULL, 
                            distance = bdist, 
                            stat = "lm")

plot of chunk overall

summary(overall_lm)
## 
## Call:
## lm(formula = dist_xy_vec ~ dist_gen_vec)
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.31377179 -0.05563021 -0.01119471  0.05422089  0.27984652 
## 
## Coefficients:
##                  Estimate   Std. Error   t value   Pr(>|t|)    
## (Intercept)  0.0337124914 0.0004386434  76.85626 < 2.22e-16 ***
## dist_gen_vec 0.6449916969 0.0028970938 222.63404 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08497729 on 131326 degrees of freedom
## Multiple R-squared:  0.2740085,  Adjusted R-squared:  0.2740029 
## F-statistic: 49565.92 on 1 and 131326 DF,  p-value: < 2.2204e-16

Without Cape Sebastian

noseb <- popsub(setpop(ramdat, ~ZONE2), blacklist = "HunterCr")
bdist.noseb <- bruvo.dist(noseb, replen = newReps)
gdist.noseb <- dist(other(noseb)$xy)
system.time(overall_mantel_noseb <- spatial_stats(noseb, 
                                                  xy = gdist.noseb, 
                                                  hierarchy = NULL, 
                                                  distance = bdist.noseb,
                                                  sample = 99999,
                                                  seed = 9001))

plot of chunk overall_noseb

##    user  system elapsed 
##  25.778   0.182  26.014
overall_mantel_noseb
## Monte-Carlo test
## Call: mantel.randtest(m1 = dist_xy, m2 = dist_gen, nrepet = sample)
## 
## Observation: 0.1747198509 
## 
## Based on 99999 replicates
## Simulated p-value: 1e-05 
## Alternative hypothesis: greater 
## 
##          Std.Obs      Expectation         Variance 
##  5.4675869829040 -0.0000732971456  0.0010220153208
overall_lm_noseb <- spatial_stats(noseb, 
                                  xy = gdist.noseb, 
                                  hierarchy = NULL, 
                                  distance = bdist.noseb, 
                                  stat = "lm")

plot of chunk overall_noseb

summary(overall_lm_noseb)
## 
## Call:
## lm(formula = dist_xy_vec ~ dist_gen_vec)
## 
## Residuals:
##          Min           1Q       Median           3Q          Max 
## -0.109238802 -0.040784800 -0.009252208  0.034635346  0.204460118 
## 
## Coefficients:
##                  Estimate   Std. Error   t value   Pr(>|t|)    
## (Intercept)  0.0588800416 0.0002875805 204.74282 < 2.22e-16 ***
## dist_gen_vec 0.1236855136 0.0022077129  56.02427 < 2.22e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05130993 on 99679 degrees of freedom
## Multiple R-squared:  0.03052703, Adjusted R-squared:  0.0305173 
## F-statistic: 3138.719 on 1 and 99679 DF,  p-value: < 2.2204e-16

2. By Year

system.time(by_year <- spatial_stats(ramdat, 
                                     xy = gdist,
                                     hierarchy = ~Pop, 
                                     distance = bdist, 
                                     sample = 99999,
                                     seed = 9001))

plot of chunk by_year

##    user  system elapsed 
##   7.775   0.388   8.176
by_year_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~Pop, distance = bdist, 
                         stat = "lm")

plot of chunk by_year

3. By Watershed

system.time(by_zone <- spatial_stats(ramdat, 
                                     xy = gdist, 
                                     hierarchy = ~ZONE2, 
                                     distance = bdist, 
                                     sample = 99999,
                                     seed = 9001))

plot of chunk by_watershed

##    user  system elapsed 
##  10.584   0.263  10.866
by_zone_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~ZONE2, 
                         distance = bdist, stat = "lm")

plot of chunk by_watershed

4. By Year with respect to Watershed

system.time(by_yearzone <- spatial_stats(ramdat, 
                                         xy = gdist, 
                                         hierarchy = ~ZONE2/Pop,
                                         distance = bdist, 
                                         sample = 99999,
                                         seed = 9001,
                                         ncol = 5))

plot of chunk by_yearwatershed

##    user  system elapsed 
##   5.826   0.726   6.564
by_yearzone_lm <- spatial_stats(ramdat, xy = gdist, hierarchy = ~ZONE2/Pop, 
                         distance = bdist, stat = "lm", ncol = 5)

plot of chunk by_yearwatershed

Summary Table

get_summary <- function(x, value){
  vapply(x, "[[", numeric(1), value)
}

split_name <- function(x){
  strsplit(x, "_")[[1]]
}

resmat <- matrix(nrow = length(by_zone) + 1, ncol = length(by_year) + 1,
                 dimnames = list(Region = c(names(by_zone), "Pooled"),
                                 Year   = c(names(by_year), "Pooled"))
                 )
resmat.p <- resmat
resmat["Pooled", ]   <- c(get_summary(by_year, "obs"), overall_mantel$obs)
resmat.p["Pooled", ] <- c(get_summary(by_year, "pvalue"), overall_mantel$pvalue)

resmat[, "Pooled"]   <- c(get_summary(by_zone, "obs"), overall_mantel$obs)
resmat.p[, "Pooled"] <- c(get_summary(by_zone, "pvalue"), overall_mantel$pvalue)

yz.obs <- get_summary(by_yearzone, "obs")
yz.p   <- get_summary(by_yearzone, "pvalue")
for (i in names(yz.obs)){
  ndex <- split_name(i)
  resmat[ndex[1], ndex[2]]   <- yz.obs[i]
  resmat.p[ndex[1], ndex[2]] <- yz.p[i]
}

charmat <- apply(resmat, 2, function(x) as.character(round(x, 2)))
charmat.p <- ifelse(resmat.p > 0.1, "", 
                    ifelse(resmat.p > 0.05, ".", 
                           ifelse(resmat.p > 0.01, "*", 
                                  ifelse(resmat.p > 0.001, "**", "***"))))

charmatnew <- charmat
charmatnew[] <- paste0(charmat, charmat.p)
charmatnew[] <- paste0(charmat, charmat.p)
charmatnew[grep("NANA", charmatnew)] <- NA
charmatnew[grep("NaN", charmatnew)] <- NaN
dimnames(charmatnew) <- dimnames(resmat)
charmatnew
##             Year
## Region       2001   2002    2003      2004      2005  2006  2010 
##   JHallCr    "0.06" "0.24." "0.14***" "0.28***" "NaN" NA    NA   
##   NFChetHigh NA     NA      "NaN"     NA        NA    NA    NA   
##   Coast      NA     NA      NA        NA        NA    "NaN" "NaN"
##   HunterCr   NA     NA      NA        NA        NA    NA    NA   
##   Winchuck   NA     NA      NA        NA        NA    NA    NA   
##   ChetcoMain NA     NA      NA        NA        NA    NA    NA   
##   PistolRSF  NA     NA      NA        NA        NA    NA    NA   
##   Pooled     "0.06" "0.24." "0.13***" "0.28***" "NaN" "NaN" "NaN"
##             Year
## Region       2011      2012      2013      2014    Pooled   
##   JHallCr    NA        NA        "0.18**"  "NaN"   "0.14***"
##   NFChetHigh NA        "0.68."   "0.41***" "-0.23" "0.35***"
##   Coast      "NaN"     "NaN"     "0.55*"   "-0.25" "0.13."  
##   HunterCr   "0.06"    NA        NA        NA      "0.06"   
##   Winchuck   NA        "0.41**"  "0.03"    NA      "0.11"   
##   ChetcoMain NA        NA        "0.53."   "NaN"   "0.63*"  
##   PistolRSF  NA        NA        "0.94"    NA      "0.94"   
##   Pooled     "0.87***" "0.59***" "0.15***" "0.14*" "0.52***"
write.table(charmatnew, file = "mantel_table.csv", sep = ",", row.names = TRUE,
            col.names = NA, na = "-")